## Please set a working directory to the folder, SP_journal_plot.
setwd("/Users/xxxx/Downloads/SP_journal_plot") # xxxx = your personal path
source("000-utils.R")

# process atop ------------------------------------------------------------
# making usa defense ally
is_defense_ally <-
  rio::import(file.path(base_path, "atop5_1dy.dta")) %>%
  filter(year %in% 1990:2018, defense == 1) %>%
  mutate(
    country1_iso3c = countrycode::countrycode(
      mem1,
      "cown",
      "iso3c",
      custom_match = c(
        "260" = "DEU",
        "265" = "DEU",
        "315" = "CZE",
        "678" = "YEM",
        "680" = "YEM"
      )
    ),
    country2_iso3c = countrycode::countrycode(
      mem2,
      "cown",
      "iso3c",
      custom_match = c(
        "260" = "DEU",
        "265" = "DEU",
        "315" = "CZE",
        "678" = "YEM",
        "680" = "YEM"
      )
    )
  ) %>%
  mutate(
    first_is_smaller = replace_na(mem1 < mem2, TRUE),
    swap_country1_iso_code =
      ifelse(first_is_smaller, country1_iso3c, country2_iso3c),
    swap_country2_iso_code =
      ifelse(first_is_smaller, country2_iso3c, country1_iso3c),
    swap_country1_cow_num =
      ifelse(first_is_smaller, mem1, mem2),
    swap_country2_cow_num =
      ifelse(first_is_smaller, mem2, mem1),
    pair_iso_code =
      paste(swap_country1_iso_code, swap_country2_iso_code, sep = "_"),
    pair_cow_num =
      paste(swap_country1_cow_num, swap_country2_cow_num, sep = "_"),
  ) %>%
  select(year, pair_iso_code, defense)

usa_ally <- is_defense_ally %>%
  select(!year) %>%
  filter(grepl("USA", pair_iso_code)) %>%
  unique() %>%
  mutate(country = str_remove(pair_iso_code, "USA_")) %>%
  pull(country)


# making pairdata ---------------------------------------------------------
# making pair data and add usa ally info
sp_count_data <- gen_pair(db_file) %>%
  filter(year < 2021) %>%
  mutate(
    defense = pair_iso_code %in% is_defense_ally$pair_iso_code,
    usa_defense = country1_iso_code %in% usa_ally |
      country2_iso_code %in% usa_ally
  )

# reverse count strategy --------------------------------------------------
# Query Each Country each year
# A complete data should consist A -> B and B -> A, two scenarios.
# However, in some case, there is only one part of it.
# In this case, I reverse the data and transform A -> B to B -> A, vice versa.
# when combine origin data and reverse data, if the data already has B -> A
# The duplicate B -> A will eliminate when unique(), otherwise preserved.
# Signed Country every year

# Get Unique Data
selected_sp_data <- sp_count_data %>%
  select(country1_iso_code, country2_iso_code, year, month, defense)

# Reverse the Data
rev_unique_sp <- selected_sp_data %>%
  mutate(
    tmp = country1_iso_code,
    country1_iso_code = country2_iso_code,
    country2_iso_code = tmp
  ) %>%
  select(!tmp)

# Combine Rev and Unique data

complete_unique_data <-
  bind_rows(selected_sp_data, rev_unique_sp) %>%
  unique()

# Count signed contract every year
count_country_yearly_signed_data <- complete_unique_data %>%
  group_by(country1_iso_code, year) %>%
  summarise(signed_country_num = n())

# Count total signed country
count_country_signed_data <- complete_unique_data %>%
  select(country1_iso_code, country2_iso_code, defense) %>%
  group_by(country1_iso_code) %>%
  summarise(
    signed_country_num = n(),
    allign = sum(defense)
  )


# Signed contracts every year ---------------------------------------------
# To find out the country signed contract

# get every country ever signed SP
all_country <- unique(c(
  sp_count_data$country1_iso_code,
  sp_count_data$country2_iso_code
))

country_year_signed_sp_complete <-
  # create a empty long table with every country in every year
  complete(tibble(year = 1990, country = "USA"),
    year = 1990:2020,
    country = all_country
  ) %>%
  rowwise() %>% # provide by ChatGPT: iterate through the row(country-year)
  mutate(
    # count the SP signed number
    # check country name has appear in pair_iso_code in origin data
    times = sum(
      sp_count_data$year == year &
        grepl(country, sp_count_data$pair_iso_code)
    ),
    # count the SP signed with ally
    ally_times = sum(
      sp_count_data$year == year &
        grepl(country, sp_count_data$pair_iso_code) &
        sp_count_data$defense == 1
    ),
    usa_ally_times = sum(
      sp_count_data$year == year &
        grepl(country, sp_count_data$pair_iso_code) &
        # partner
        str_remove_all(sp_count_data$pair_iso_code, sprintf("%s|_", country)) %in% usa_ally
    )
  ) %>%
  ungroup()




# get cumulative number
country_year_signed_sp_sum <- country_year_signed_sp_complete %>%
  group_by(country) %>%
  mutate(
    country_cum = cumsum(times),
    ally_cum = cumsum(ally_times),
    non_ally_times = times - ally_times,
    non_ally_cum = country_cum - ally_cum,
    distance_ally_non_ally = non_ally_cum - ally_cum,
    usa_ally_cum = cumsum(usa_ally_times)
  ) %>%
  ungroup()
